In [1]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import re

# Load the CSV files
xgb_df = pd.read_csv('results/xgb.csv')
ollama_df = pd.read_csv('results/ollama.csv')
In [2]:
import plotly.io as pio

pio.renderers.default = "notebook"
In [3]:
# Show xgb_df in a sortable, interactive table
display(xgb_df)
machine CPU GPU python platform bench dataset_rows gpu train_median_s infer_median_s auc seed timestamp
0 PC_AL_2025 AMD Ryzen 5 9600X Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 xgboost 100000 True 0.80 0.01 0.81023 42 2025-07-23T21:02:39
1 PC_AL_2025 AMD Ryzen 5 9600X Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 xgboost 100000 False 0.59 0.01 0.81072 42 2025-07-23T21:02:43
2 PC_AL_2025 AMD Ryzen 5 9600X Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 xgboost 1000000 True 1.60 0.01 0.82277 42 2025-07-23T21:02:54
3 PC_AL_2025 AMD Ryzen 5 9600X Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 xgboost 1000000 False 3.99 0.10 0.82276 42 2025-07-23T21:03:11
4 PC_AL_2025 AMD Ryzen 5 9600X Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 xgboost full True 9.93 0.06 0.82486 42 2025-07-23T21:04:25
5 PC_AL_2025 AMD Ryzen 5 9600X Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 xgboost full False 50.37 1.04 0.82495 42 2025-07-23T21:07:42
6 PC_AL_2015 Intel i5-4690K Nvidia GTX 1060 6GB 3.13.5 Windows-10-10.0.19045-SP0 xgboost 100000 True 1.66 0.01 0.80975 42 2025-07-23T23:13:28
7 PC_AL_2015 Intel i5-4690K Nvidia GTX 1060 6GB 3.13.5 Windows-10-10.0.19045-SP0 xgboost 100000 False 1.79 0.03 0.80892 42 2025-07-23T23:13:36
8 PC_AL_2015 Intel i5-4690K Nvidia GTX 1060 6GB 3.13.5 Windows-10-10.0.19045-SP0 xgboost 1000000 True 5.80 0.04 0.82262 42 2025-07-23T23:14:04
9 PC_AL_2015 Intel i5-4690K Nvidia GTX 1060 6GB 3.13.5 Windows-10-10.0.19045-SP0 xgboost 1000000 False 13.69 0.32 0.82227 42 2025-07-23T23:14:57
10 PC_AL_2015 Intel i5-4690K Nvidia GTX 1060 6GB 3.13.5 Windows-10-10.0.19045-SP0 xgboost full True 45.20 0.38 0.82495 42 2025-07-23T23:18:54
11 PC_AL_2015 Intel i5-4690K Nvidia GTX 1060 6GB 3.13.5 Windows-10-10.0.19045-SP0 xgboost full False 147.38 3.46 0.82489 42 2025-07-23T23:28:06
12 Mark1 AMD Ryzen 7 9800X3D Nvidia GTX 4080 SUPER 3.13.5 Linux-5.15.167.4-microsoft-standard-WSL2-x86_6... xgboost 100000 True 0.88 0.01 0.81023 42 2025-07-26T12:04:36
13 Mark1 AMD Ryzen 7 9800X3D Nvidia GTX 4080 SUPER 3.13.5 Linux-5.15.167.4-microsoft-standard-WSL2-x86_6... xgboost 100000 False 1.66 0.01 0.80963 42 2025-07-26T12:04:42
14 Mark1 AMD Ryzen 7 9800X3D Nvidia GTX 4080 SUPER 3.13.5 Linux-5.15.167.4-microsoft-standard-WSL2-x86_6... xgboost 1000000 True 1.15 0.01 0.82277 42 2025-07-26T12:04:51
15 Mark1 AMD Ryzen 7 9800X3D Nvidia GTX 4080 SUPER 3.13.5 Linux-5.15.167.4-microsoft-standard-WSL2-x86_6... xgboost 1000000 False 1.67 0.06 0.82306 42 2025-07-26T12:05:03
16 Mark1 AMD Ryzen 7 9800X3D Nvidia GTX 4080 SUPER 3.13.5 Linux-5.15.167.4-microsoft-standard-WSL2-x86_6... xgboost full True 5.42 0.03 0.82486 42 2025-07-26T12:06:10
17 Mark1 AMD Ryzen 7 9800X3D Nvidia GTX 4080 SUPER 3.13.5 Linux-5.15.167.4-microsoft-standard-WSL2-x86_6... xgboost full False 31.07 0.62 0.82495 42 2025-07-26T12:08:42
18 PC_AL_2025_6000 AMD Ryzen 5 9600X mem-opt Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 xgboost 100000 True 0.77 0.01 0.81023 42 2025-07-27T14:22:19
19 PC_AL_2025_6000 AMD Ryzen 5 9600X mem-opt Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 xgboost 100000 False 0.55 0.01 0.81072 42 2025-07-27T14:22:23
20 PC_AL_2025_6000 AMD Ryzen 5 9600X mem-opt Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 xgboost 1000000 True 1.47 0.01 0.82277 42 2025-07-27T14:22:36
21 PC_AL_2025_6000 AMD Ryzen 5 9600X mem-opt Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 xgboost 1000000 False 3.87 0.10 0.82276 42 2025-07-27T14:23:00
22 PC_AL_2025_6000 AMD Ryzen 5 9600X mem-opt Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 xgboost full True 9.83 0.05 0.82486 42 2025-07-27T14:24:33
23 PC_AL_2025_6000 AMD Ryzen 5 9600X mem-opt Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 xgboost full False 47.16 1.03 0.82495 42 2025-07-27T14:29:17
In [4]:
# Build machine label dynamically (CPU + GPU)
xgb_df["machine_label"] = xgb_df["CPU"] + " + " + xgb_df["GPU"]

# Acceleration label
xgb_df["acc_label"] = xgb_df["gpu"].astype(str).replace({"True": "GPU", "False": "CPU"})

# ------------------ Build dynamic and formatted dataset_rows labels -------------
def format_thousands(s: str) -> str:
    n = int(s)
    # thousands separator '.'
    return f"{n:,}".replace(",", ".")

unique_rows = xgb_df["dataset_rows"].astype(str).unique()

numeric_vals = sorted([int(v) for v in unique_rows if re.fullmatch(r"\d+", v)])
labels_formatted = [format_thousands(str(v)) for v in numeric_vals]

# ensure 'full' appears last
if "full" in unique_rows:
    labels_formatted.append("full")

# Map original rows to formatted label
mapping_label = {str(v): format_thousands(str(v)) for v in numeric_vals}
mapping_label["full"] = "full"

xgb_df["rows_label"] = xgb_df["dataset_rows"].astype(str).map(mapping_label)

# Make categorical for ordering (ascending numeric then 'full')
xgb_df["rows_label"] = pd.Categorical(xgb_df["rows_label"], categories=labels_formatted, ordered=True)

# ------------------ Machine-related dynamic parts ------------------
machine_labels = sorted(xgb_df["machine_label"].unique())

# pattern shapes cycled
patterns_cycle = ["", "/", "x", "\\", "-", "|", "+", "."]
pattern_map = {machine: patterns_cycle[i % len(patterns_cycle)]
               for i, machine in enumerate(machine_labels)}

# Color map for acceleration
color_map = {"GPU": px.colors.qualitative.Pastel[0],
             "CPU": px.colors.qualitative.Pastel[1]}

# ------------------ Build traces ------------------
traces = []
meta = []  # store metadata per trace

for machine in machine_labels:
    for acc in ["GPU", "CPU"]:
        subset = xgb_df[(xgb_df["machine_label"] == machine) & (xgb_df["acc_label"] == acc)]
        if subset.empty:
            continue
        traces.append(
            go.Bar(
                y=subset["rows_label"],
                x=subset["train_median_s"],
                orientation="h",
                showlegend=False,
                marker=dict(color=color_map[acc], pattern=dict(shape=pattern_map[machine])),
                text=[f"{t:.2f}" for t in subset["train_median_s"]],
                textposition="outside",
                hovertemplate=(
                    f"{machine}<br>%{{y}} righe · {acc}<br>%{{x:.2f}} s"
                    "<extra></extra>"
                ),
            )
        )
        meta.append({"machine": machine, "acc": acc})

fig = go.Figure(data=traces)

# ------------------ Visibility masks ------------------
n = len(traces)
vis_all = [True]*n
vis_gpu_only = [m["acc"] == "GPU" for m in meta]
vis_cpu_only = [m["acc"] == "CPU" for m in meta]

machine_vis_dict = {machine: [m["machine"] == machine for m in meta]
                    for machine in machine_labels}

# ------------------ Dropdowns ------------------
dropdown_acc = dict(
    buttons=[
        dict(label="Tutte le accelerazioni", method="update",
             args=[{"visible": vis_all},
                   {"title": "Tempo di training – tutte le accelerazioni"}]),
        dict(label="Solo GPU", method="update",
             args=[{"visible": vis_gpu_only},
                   {"title": "Tempo di training – solo GPU"}]),
        dict(label="Solo CPU", method="update",
             args=[{"visible": vis_cpu_only},
                   {"title": "Tempo di training – solo CPU"}]),
    ],
    direction="down",
    x=1.02,
    y=1,
    xanchor="left",
    yanchor="top",
    showactive=True,
    bgcolor="white",
    bordercolor="lightgray"
)

machine_buttons = [
    dict(label="Tutte le macchine", method="update",
         args=[{"visible": vis_all},
               {"title": "Tempo di training – tutte le macchine"}])
]
for machine in machine_labels:
    machine_buttons.append(
        dict(label=machine, method="update",
             args=[{"visible": machine_vis_dict[machine]},
                   {"title": f"Tempo di training – {machine}"}])
    )

dropdown_machine = dict(
    buttons=machine_buttons,
    direction="down",
    x=1.02,
    y=0.8,
    xanchor="left",
    yanchor="top",
    showactive=True,
    bgcolor="white",
    bordercolor="lightgray"
)

# ------------------ Layout updates ------------------
fig.update_layout(
    barmode="group",
    title="Tempo di training – tutte le accelerazioni e macchine",
    xaxis_title="Train time [s]",
    yaxis_title="Numero di righe del dataset",
    updatemenus=[dropdown_acc, dropdown_machine],
    height = len(xgb_df) * 40,
    margin=dict(r=200)  # space for menus
)

fig.update_yaxes(categoryorder='array', categoryarray=labels_formatted)

fig.show()
In [5]:
# Show ollama_df with tabulate
display(ollama_df)
machine CPU GPU python platform bench model gpu wall_min_s wall_med_s wall_max_s tok_min_s tok_med_s tok_max_s seed timestamp
0 PC_AL_2025 AMD Ryzen 5 9600X Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 ollama phi3:3.8b True 6.37 6.45 14.81 133.99 136.25 136.62 42 2025-07-23T21:08:10
1 PC_AL_2025 AMD Ryzen 5 9600X Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 ollama phi3:3.8b False 31.59 34.68 34.70 21.43 21.44 21.55 42 2025-07-23T21:09:52
2 PC_AL_2025 AMD Ryzen 5 9600X Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 ollama qwen3:4b True 13.93 14.19 19.01 105.24 106.41 107.55 42 2025-07-23T21:10:39
3 PC_AL_2025 AMD Ryzen 5 9600X Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 ollama qwen3:4b False 92.86 92.92 142.73 12.89 13.18 13.19 42 2025-07-23T21:16:08
4 PC_AL_2025 AMD Ryzen 5 9600X Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 ollama qwen3:14b True 26.28 26.34 29.25 40.56 40.74 40.75 42 2025-07-23T21:17:30
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
59 PC_AL_2025_6000 AMD Ryzen 5 9600X mem-opt Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 ollama gemma3n:e2b False 10.59 10.62 12.10 31.21 31.34 31.40 42 2025-07-27T15:09:52
60 PC_AL_2025_6000 AMD Ryzen 5 9600X mem-opt Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 ollama deepseek-r1:8b True 19.55 20.60 20.68 66.85 67.15 67.28 42 2025-07-27T15:11:34
61 PC_AL_2025_6000 AMD Ryzen 5 9600X mem-opt Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 ollama deepseek-r1:8b False 98.82 98.94 141.58 9.58 9.65 9.66 42 2025-07-27T15:20:33
62 PC_AL_2025_6000 AMD Ryzen 5 9600X mem-opt Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 ollama deepseek-r1:14b True 15.84 15.86 23.47 39.88 40.14 40.14 42 2025-07-27T15:22:00
63 PC_AL_2025_6000 AMD Ryzen 5 9600X mem-opt Nvidia RTX 5060 Ti 16GB 3.13.5 Windows-11-10.0.26100-SP0 ollama deepseek-r1:14b False 82.42 82.44 111.48 6.78 6.80 6.81 42 2025-07-27T15:29:22

64 rows × 16 columns

In [6]:
# Acceleration label
ollama_df["acc_label"] = ollama_df["gpu"].astype(str).replace({"True": "GPU", "False": "CPU"})

# Machine label = CPU + GPU (dynamic)
ollama_df["machine_label"] = ollama_df["CPU"] + " + " + ollama_df["GPU"]

# Sorted list of unique models (appearance order preserved)
model_order = list(dict.fromkeys(ollama_df["model"].tolist()))  # preserve first appearance order
ollama_df["model"] = pd.Categorical(ollama_df["model"], categories=model_order, ordered=True)

# Sorted list of unique machines
#machine_labels = list(dict.fromkeys(ollama_df["machine_label"].tolist()))
machine_labels = sorted(ollama_df["machine_label"].unique())

# Pattern shapes (cycle if more machines)
patterns_cycle = ["", "/", "x", "\\", "-", "|", "+", "."]
pattern_map = {
    machine: patterns_cycle[i % len(patterns_cycle)]
    for i, machine in enumerate(machine_labels)
}

# Colour mapping for GPU/CPU
color_map = {"GPU": px.colors.qualitative.Pastel[0], "CPU": px.colors.qualitative.Pastel[1]}

# ------------------ Build traces ------------------
traces = []
meta = []

for machine in machine_labels:
    for acc in ["GPU", "CPU"]:  # fixed order
        subset = ollama_df[(ollama_df["machine_label"] == machine) & (ollama_df["acc_label"] == acc)]
        if subset.empty:
            continue
        traces.append(
            go.Bar(
                y=subset["model"],
                x=subset["tok_med_s"],   # average tokens per second
                orientation="h",
                showlegend=False,
                marker=dict(color=color_map[acc], pattern=dict(shape=pattern_map[machine])),
                text=[f"{t:.2f}" for t in subset["tok_med_s"]],
                textposition="outside",
                hovertemplate=(
                    f"{machine}<br>Modello: %{{y}} · {acc}<br>%{{x:.2f}} token/s"
                    "<extra></extra>"
                ),
            )
        )
        meta.append({"machine": machine, "acc": acc})

fig = go.Figure(data=traces)

# ------------------ Visibility masks ------------------
n = len(traces)
vis_all = [True]*n
vis_gpu_only = [m["acc"] == "GPU" for m in meta]
vis_cpu_only = [m["acc"] == "CPU" for m in meta]

machine_vis_dict = {machine: [m["machine"] == machine for m in meta]
                    for machine in machine_labels}

# ------------------ Dropdowns ------------------
dropdown_acc = dict(
    buttons=[
        dict(label="Tutte le accelerazioni", method="update",
             args=[{"visible": vis_all},
                   {"title": "Token al secondo – tutte le accelerazioni"}]),
        dict(label="Solo GPU", method="update",
             args=[{"visible": vis_gpu_only},
                   {"title": "Token al secondo – solo GPU"}]),
        dict(label="Solo CPU", method="update",
             args=[{"visible": vis_cpu_only},
                   {"title": "Token al secondo – solo CPU"}]),
    ],
    direction="down",
    x=1.02,
    y=1,
    xanchor="left",
    yanchor="top",
    showactive=True,
    bgcolor="white",
    bordercolor="lightgray"
)

machine_buttons = [
    dict(label="Tutte le macchine", method="update",
         args=[{"visible": vis_all},
               {"title": "Token al secondo – tutte le macchine"}])
]
for machine in machine_labels:
    machine_buttons.append(
        dict(label=machine, method="update",
             args=[{"visible": machine_vis_dict[machine]},
                   {"title": f"Token al secondo – {machine}"}])
    )

dropdown_machine = dict(
    buttons=machine_buttons,
    direction="down",
    x=1.02,
    y=0.8,
    xanchor="left",
    yanchor="top",
    showactive=True,
    bgcolor="white",
    bordercolor="lightgray"
)

# ------------------ Layout ------------------
fig.update_layout(
    barmode="group",
    title="Token al secondo – tutte le accelerazioni e macchine",
    xaxis_title="Token/s",
    yaxis_title="Modello LLM",
    updatemenus=[dropdown_acc, dropdown_machine],
    height = len(ollama_df) * 40,
    margin=dict(r=220)
)

fig.show()